library(dplyr)
library(tsibble)
library(readxl)
library(forecast)
library(fpp2)
library(lubridate)
library(survival)
library(survminer)
library(ggsurvfit)
library(gtsummary)
library(tidycmprsk)
#library(condsurv)



Speech_data<- read_excel("Houthis_Speech_Data.xlsx")
Interval_data<- read_excel("Houthis.xlsx")
################################################################################
#BACE#
################################################################################

#dataframe for graphing
dataGraphing = data.frame(matrix(nrow=0,ncol=2))
columns= c("threshold","pvalue")
colnames(dataGraphing)=columns


# We want to do survival analysis for different threshold values of BACE
# which ranges from 0 to 1 inclusive.

for (threshold in seq(from=-0.01, to=1, by =0.01))
{
  #for each interval in Interval_data set group to 1 if at least one 
  #speech in Speech_data has BACE > threshold for the current interval
  
  #initialize Group to 2 for all rows
  Interval_data$Group<-2
  
  for (interval in seq(from=1, to=660, by =1))
  { 
    #print(interval)
    #create temporary vector for current interval, by grabbing vector of scores for the interval
    current_interval<-Speech_data[Speech_data$Houthis_obs==interval,]$BACE
    #then remove na scores 
    current_interval<-current_interval[!is.na(current_interval)]
    
    #max of BACE for all rows in  Interval_data for current_interval
    
    if (length(current_interval)==0 )
    {}
    else if (max(current_interval) > threshold)
    {
      Interval_data[Interval_data$ID==interval,]$Group<-1
    }
    
    
    #print(paste("interval:", interval, "Max: ", max(current_interval), "threshold: ", threshold, "Group:", Interval_data[Interval_data$ID==interval,]$Group))
  }
  
  
  #run survival 
  #print( 
  logtest <- survdiff(Surv(time=Interval_data$Numberofdays)~Group,data=Interval_data)
    #)
  
  #save the threshold value and p-value to a table. 
  dataGraphing[nrow(dataGraphing) + 1,] <- list(threshold,  logtest$pvalue)
  
}

#plot values from logtest/threshold table.
plot(dataGraphing,xlab= "Threshold" , ylab= "Pvalue", main= "BACE_Houthis")
write.csv(dataGraphing,"Pvalue_BACE_Houthis.csv" , row.names=FALSE)


Hist_BACE <- hist(as.numeric(Speech_data$BACE), breaks = 10)
Hist_table <- data.frame(Hist_BACE$counts, Hist_BACE$mids)
write.csv(Hist_table,"Hist_BACE_Houthis.csv", row.names=FALSE )




################################################################################
#P1#
################################################################################


#dataframe for graphing
dataGraphing = data.frame(matrix(nrow=0,ncol=2))
columns= c("threshold","pvalue")
colnames(dataGraphing)=columns

# We want to do survival for different threshold values of P1
#which ranges from 0 to 1 inclusive.

for (threshold in seq(from= 1.01, to=-1.01, by =-0.01))
  
{
  
  #for each interval in Interval_data set group to 1 if at least one 
  #speech in Speech_data has P1 < threshold for the current interval
  
  #initialize Group to 2 for all rows
  Interval_data$Group<-2
  
  #for each interval in dataHouthisloop set Group to 1 if at least one 
  #speech in speech_data has P1 < threshold for the current interval
  
  
  for (interval in seq(from=1, to=660, by =1))
  { 
    #print(interval)
    #create temporary vector for current interval, by grabbing vector of scores for the interval
    current_interval<-Speech_data[Speech_data$Houthis_obs==interval,]$P1
    #then remove na scores 
    current_interval<-current_interval[!is.na(current_interval)]
    #coerce values to numeric
    #current_interval<-as.numeric(current_interval)
    
    #max of BACE for all rows in  Interval_data for current_interval
    
    if (length(current_interval)==0 )
    {}
    else if (max(current_interval) < threshold)
    {
      Interval_data[Interval_data$ID==interval,]$Group<-1
    }
    
    
    #print(paste("interval:", interval, "Max: ", max(current_interval), "threshold: ", threshold, "Group:", Interval_data[Interval_data$ID==interval,]$Group))
  }
  
  
  #run survival 
  #print(
  logtest <- survdiff(Surv(time=Interval_data$Numberofdays)~Group,data=Interval_data)
  #)
  
  #save the threshold value and p-value to a table. 
  dataGraphing[nrow(dataGraphing) + 1,] <- list(threshold,  logtest$pvalue)
  
  
}


#plot values from logtest/threshold table.
plot(dataGraphing,xlab= "Pvalue" , ylab="Threshold", main= "P1_Houthis")

#names(dataGraphing)
write.csv(dataGraphing,"Pvalue_P1_Houthis.csv" , row.names=FALSE)

Houthis_P1 <- Speech_data$P1
histHezP1 <- hist(as.numeric(Houthis_P1, breaks=10))
tablehounnACH <- data.frame(histHezP1$counts , histHezP1$mids)
write.csv(tablehounnACH,"Hist_P1_Houthis.csv" , row.names=FALSE)



################################################################################
#LTA_Classic_DIS#
################################################################################


#dataframe for graphing
dataGraphing = data.frame(matrix(nrow=0,ncol=2))
columns= c("threshold","pvalue")
colnames(dataGraphing)=columns



# We want to do survival for different threshold values of LTA_Classic_DIS
#which ranges from 0 to 1 inclusive.

for (threshold in seq(from= -0.01, to=1, by =0.01))
  
{
  
  #for each interval in Interval_data set group to 1 if at least one 
  #speech in Speech_data has BACE > threshold for the current interval
  
  #initialize Group to 2 for all rows
  Interval_data$Group<-2
  
  #for each interval in dataHouthisloop set BACE_above to 1 if at least one 
  #speech in dataHouthisloop has BACE_above = 1 for the current interval
  
  
  for (interval in seq(from=1, to=266, by =1))
  { 
    #print(interval)
    #create temporary vector for current interval, by grabbing vector of scores for the interval
    current_interval<-Speech_data[Speech_data$Houthis_obs==interval,]$LTA_Classic_DIS
    #then remove na scores 
    current_interval<-current_interval[!is.na(current_interval)]
    
    #max of BACE for all rows in  Interval_data for current_interval
    
    if (length(current_interval)==0 )
    {}
    else if (max(current_interval) > threshold)
    {
      Interval_data[Interval_data$ID==interval,]$Group<-1
    }
    
    
    #print(paste("interval:", interval, "Max: ", max(current_interval), "threshold: ", threshold, "Group:", Interval_data[Interval_data$ID==interval,]$Group))
  }
  
  
  #run survival 
  #print(
  logtest <- survdiff(Surv(time=Interval_data$Numberofdays)~Group,data=Interval_data)
  #)
  
  #save the threshold value and p-value to a table. 
  dataGraphing[nrow(dataGraphing) + 1,] <- list(threshold,  logtest$pvalue)
  
  
}


#plot values from logtest/threshold table.
plot(dataGraphing ,xlab= "Pvalue" , ylab="Threshold", main = "Houthis_LTA_Classic_DIS")
write.csv(dataGraphing,"Pvalue_LTADIS_Houthis.csv" , row.names=FALSE)

# tables 

histogram <-hist(Speech_data$LTA_Classic_DIS, breaks=10)
tablehamDIS <- data.frame(histogram$counts , histogram$mids)
write.csv(tablehamDIS,"Hist_LTADIS_Houthis.csv" , row.names=FALSE)


################################################################################
#nPWR_score#
################################################################################



#dataframe for graphing
dataGraphing = data.frame(matrix(nrow=0,ncol=2))
columns= c("threshold","pvalue")
colnames(dataGraphing)=columns


# We want to do survival analysis for different threshold values of nPWR_score
# which ranges from 0 to 1 inclusive.


for (threshold in seq(from=-0.01, to=1, by =0.01))
{
  #for each interval in Interval_data set group to 1 if at least one 
  #speech in Speech_data has nPWR_score > threshold for the current interval
  
  #initialize Group to 2 for all rows
  Interval_data$Group<-2
  
  for (interval in seq(from=1, to=660, by =1))
  { 
    #print(interval)
    #create temporary vector for current interval, by grabbing vector of scores for the interval
    current_interval<-Speech_data[Speech_data$Houthis_obs==interval,]$nPWR_score
    #then remove na scores 
    current_interval<-current_interval[!is.na(current_interval)]
    #coerce values to numeric
    current_interval<-as.numeric(current_interval)
    
    #print(paste("Interval: ", interval, " values: "))
    #print(current_interval)
    
    #max of nPWR_score for all rows in  Interval_data for current_interval
    
    if (length(current_interval)==0 )
    {}
    else if (max(current_interval) > threshold)
    {
      Interval_data[Interval_data$ID==interval,]$Group<-1
    }
    
    
    #print(paste("interval:", interval, "Max: ", max(current_interval), "threshold: ", threshold, "Group:", Interval_data[Interval_data$ID==interval,]$Group))
  }
  
  print(paste("threshold: ", threshold))
  #run survival 
  print (logtest <- survdiff(Surv(time=Interval_data$Numberofdays)~Group,data=Interval_data))
  
  #save the threshold value and p-value to a table. 
  dataGraphing[nrow(dataGraphing) + 1,] <- list(threshold,  logtest$pvalue)
  
}

#max(Speech_data$nPWR_score)

#plot values from logtest/threshold table.
plot(dataGraphing,ylab= "Pvalue" , xlab="Threshold", main= "nPWR_Houthis")
write.csv(dataGraphing,"Pvalue_nPWR_Houthis.csv" , row.names=FALSE)


Houthi_nPWR_score <- Speech_data$nPWR_score
histHouthi_nPWR_score<-hist(as.numeric(Houthi_nPWR_score), breaks = 10)
tablenPWR_score <- data.frame(histHouthi_nPWR_score$counts , histHouthi_nPWR_score$mids)
write.csv(tablenPWR_score,"Hist_nPWR_Houthis.csv" , row.names=FALSE)


################################################################################
#nACH_Score#
################################################################################



#dataframe for graphing
dataGraphing = data.frame(matrix(nrow=0,ncol=2))
columns= c("threshold","pvalue")
colnames(dataGraphing)=columns

# We want to do survival for different threshold values of nACH_Score
#which ranges from 0 to 1 inclusive.

for (threshold in seq(from=0, to=0.03, by =0.001))
{
  #for each interval in Interval_data set group to 1 if at least one 
  #speech in Speech_data has nACH_Score > threshold for the current interval
  
  #initialize Group to 2 for all rows
  Interval_data$Group<-2
  
  for (interval in seq(from=1, to=266, by =1))
  { 
    #print(interval)
    #create temporary vector for current interval, by grabbing vector of scores for the interval
    current_interval<-Speech_data[Speech_data$Houthis_obs==interval,]$nACH_Score
    #then remove na scores 
    current_interval<-current_interval[!is.na(current_interval)]
    #coerce values to numeric
    current_interval<-as.numeric(current_interval)
    
    #max of nACH_Score for all rows in  Interval_data for current_interval
    
    if (length(current_interval)==0 )
    {}
    else if (max(current_interval) > threshold)
    {
      Interval_data[Interval_data$ID==interval,]$Group<-1
    }
    
    
    #print(paste("interval:", interval, "Max: ", max(current_interval), "threshold: ", threshold, "Group:", Interval_data[Interval_data$ID==interval,]$Group))
  }
  
  print(paste("threshold: ", threshold))
  #run survival 
  print (logtest <- survdiff(Surv(time=Interval_data$Numberofdays)~Group,data=Interval_data))
  
  
  #save the threshold value and p-value to a table. 
  dataGraphing[nrow(dataGraphing) + 1,] <- list(threshold,  logtest$pvalue)
  
}



#plot values from logtest/threshold table.
plot(dataGraphing,ylab= "Pvalue" , xlab="Threshold", main= "nACH_Houthis")
write.csv(dataGraphing,"Pvalue_nACH_Houthis.csv" , row.names=FALSE)


Hez_nACH_Score <- Speech_data$nACH_Score
histHeznach_SC<-hist(Hez_nACH_Score, breaks = 10)
tableheznACH <- data.frame(histHeznach_SC$counts , histHeznach_SC$mids)
write.csv(tableheznACH,"Hist_nACH_Houthis.csv" , row.names=FALSE)